/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tokenizers.en;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import org.languagetool.tokenizers.WordTokenizer;

public class EnglishWordTokenizer
extends WordTokenizer {
    private static final String[] EXCEPTIONS = new String[]{"fo'c'sle"};
    private static final String[] EXCEPTION_REPLACEMENT = new String[]{"fo\u2626c\u2626sle"};

    @Override
    public String getTokenizingCharacters() {
        return super.getTokenizingCharacters() + "\u2013";
    }

    @Override
    public List<String> tokenize(String text) {
        ArrayList<String> l = new ArrayList<String>();
        boolean hasException = false;
        for (int idx = 0; idx < EXCEPTIONS.length; ++idx) {
            if (!text.contains(EXCEPTIONS[idx])) continue;
            hasException = true;
            text = text.replace(EXCEPTIONS[idx], EXCEPTION_REPLACEMENT[idx]);
        }
        StringTokenizer st = new StringTokenizer(text, this.getTokenizingCharacters(), true);
        while (st.hasMoreElements()) {
            String token = st.nextToken();
            for (int idx = 0; hasException && idx < EXCEPTIONS.length; ++idx) {
                if (!token.equals(EXCEPTION_REPLACEMENT[idx])) continue;
                token = EXCEPTIONS[idx];
            }
            if (token.length() > 1 && token.endsWith("-")) {
                l.add(token.substring(0, token.length() - 1));
                l.add("-");
                continue;
            }
            l.add(token);
        }
        return this.joinEMailsAndUrls(l);
    }
}

